Lecture

“gg” stands for “grammar of graphics”

Create the weather data

weather_df = 
  rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
                      var = c("PRCP", "TMIN", "TMAX"), 
                      date_min = "2017-01-01",
                      date_max = "2017-12-31") %>%
  mutate(
    name = recode(id, USW00094728 = "CentralPark_NY", 
                      USC00519397 = "Waikiki_HA",
                      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
## file path:          /Users/SigL/Library/Caches/rnoaa/ghcnd/USW00094728.dly
## file last updated:  2019-09-26 10:25:27
## file min/max dates: 1869-01-01 / 2019-09-30
## file path:          /Users/SigL/Library/Caches/rnoaa/ghcnd/USC00519397.dly
## file last updated:  2019-09-26 10:25:41
## file min/max dates: 1965-01-01 / 2019-09-30
## file path:          /Users/SigL/Library/Caches/rnoaa/ghcnd/USS0023B17S.dly
## file last updated:  2019-09-26 10:25:46
## file min/max dates: 1999-09-01 / 2019-09-30
weather_df
## # A tibble: 1,095 x 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows

Create a ggplot

ggplot(weather_df, aes(x = tmin, y = tmax)) + 
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

alternate way of making this plot

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point()
## Warning: Removed 15 rows containing missing values (geom_point).

adding color

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .4)
## Warning: Removed 15 rows containing missing values (geom_point).

alpha–transparency

Why do aes positions matter?

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .4) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .4) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).

## Warning: Removed 15 rows containing missing values (geom_point).

se = standard error? The former add (color = name) only to geom_points, the latter add (color = name) to the whole graph?

facet!

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .4) +
  geom_smooth(se = FALSE) +
  facet_grid(~name)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

–Good for individual reading, bad for comparison

Above is fine, but only limited to min & max values (should always be linear?)

weather_df %>% 
  ggplot(aes(x = date, y = tmax, color = name)) + 
  geom_point() +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
## Warning: Removed 3 rows containing missing values (geom_point).

weather_df %>% 
  ggplot(aes(x = date, y = tmax, color = name)) + 
  geom_point(aes(size = prcp), alpha = .3) +
  geom_smooth(size = 2, se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

## Warning: Removed 3 rows containing missing values (geom_point).

weather_df %>% 
  ggplot(aes(x = date, y = prcp, color = name)) + 
  geom_point() +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).

## Warning: Removed 3 rows containing missing values (geom_point).

2d density

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_hex()
## Warning: Removed 15 rows containing non-finite values (stat_binhex).

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) +
  geom_hex() +
  facet_grid(~name)
## Warning: Removed 15 rows containing non-finite values (stat_binhex).

geom_bin2d() similar to geom_hex()

More kinds of plots

weather_df %>% 
  ggplot(aes(x = tmax, color = name)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

This does not look right. For histogram, there is a difference between “color” and “fill”

weather_df %>% 
  ggplot(aes(x = tmax, fill = name)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

weather_df %>% 
  ggplot(aes(x = tmax, fill = name)) +
  geom_histogram(position = "dodge")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

weather_df %>% 
  ggplot(aes(x = tmax, fill = name)) +
  geom_histogram() +
  facet_grid(~name)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing non-finite values (stat_bin).

density plots

weather_df %>% 
  ggplot(aes(x = tmax, fill = name)) +
  geom_density(alpha = .3)
## Warning: Removed 3 rows containing non-finite values (stat_density).

boxplots

weather_df %>% 
  ggplot(aes(x = name, y = tmax)) +
  geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

violin plots

weather_df %>% 
  ggplot(aes(x = name, y = tmax)) +
  geom_violin()
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).

ridge plots

weather_df %>% 
  ggplot(aes(x = tmax, y = name)) +
  geom_density_ridges()
## Picking joint bandwidth of 1.84
## Warning: Removed 3 rows containing non-finite values (stat_density_ridges).